{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Gradient Boosting Algorithms - XGBoost"
      ],
      "metadata": {}
    },
    {
      "cell_type": "markdown",
      "source": [
        "XGBoost is an algorithm that is used for applied machine learning "
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# fix_yahoo_finance is used to fetch data \n",
        "import fix_yahoo_finance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2014-01-01'\n",
        "end = '2019-01-01'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View Columns\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 downloaded\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Open</th>\n",
              "      <th>High</th>\n",
              "      <th>Low</th>\n",
              "      <th>Close</th>\n",
              "      <th>Adj Close</th>\n",
              "      <th>Volume</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Date</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>2014-01-02</th>\n",
              "      <td>3.85</td>\n",
              "      <td>3.98</td>\n",
              "      <td>3.84</td>\n",
              "      <td>3.95</td>\n",
              "      <td>3.95</td>\n",
              "      <td>20548400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-03</th>\n",
              "      <td>3.98</td>\n",
              "      <td>4.00</td>\n",
              "      <td>3.88</td>\n",
              "      <td>4.00</td>\n",
              "      <td>4.00</td>\n",
              "      <td>22887200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-06</th>\n",
              "      <td>4.01</td>\n",
              "      <td>4.18</td>\n",
              "      <td>3.99</td>\n",
              "      <td>4.13</td>\n",
              "      <td>4.13</td>\n",
              "      <td>42398300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-07</th>\n",
              "      <td>4.19</td>\n",
              "      <td>4.25</td>\n",
              "      <td>4.11</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>42932100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-08</th>\n",
              "      <td>4.23</td>\n",
              "      <td>4.26</td>\n",
              "      <td>4.14</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>30678700</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "            Open  High   Low  Close  Adj Close    Volume\n",
              "Date                                                    \n",
              "2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n",
              "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n",
              "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n",
              "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n",
              "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
        "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 3,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Open</th>\n",
              "      <th>High</th>\n",
              "      <th>Low</th>\n",
              "      <th>Close</th>\n",
              "      <th>Adj Close</th>\n",
              "      <th>Volume</th>\n",
              "      <th>Increase_Decrease</th>\n",
              "      <th>Buy_Sell_on_Open</th>\n",
              "      <th>Buy_Sell</th>\n",
              "      <th>Returns</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Date</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>2014-01-03</th>\n",
              "      <td>3.98</td>\n",
              "      <td>4.00</td>\n",
              "      <td>3.88</td>\n",
              "      <td>4.00</td>\n",
              "      <td>4.00</td>\n",
              "      <td>22887200</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>0.012658</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-06</th>\n",
              "      <td>4.01</td>\n",
              "      <td>4.18</td>\n",
              "      <td>3.99</td>\n",
              "      <td>4.13</td>\n",
              "      <td>4.13</td>\n",
              "      <td>42398300</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>0.032500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-07</th>\n",
              "      <td>4.19</td>\n",
              "      <td>4.25</td>\n",
              "      <td>4.11</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>42932100</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>0.012107</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-08</th>\n",
              "      <td>4.23</td>\n",
              "      <td>4.26</td>\n",
              "      <td>4.14</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>30678700</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-09</th>\n",
              "      <td>4.20</td>\n",
              "      <td>4.23</td>\n",
              "      <td>4.05</td>\n",
              "      <td>4.09</td>\n",
              "      <td>4.09</td>\n",
              "      <td>30667600</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>-0.021531</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "            Open  High   Low  Close  Adj Close    Volume  Increase_Decrease  \\\n",
              "Date                                                                          \n",
              "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200                  1   \n",
              "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300                  1   \n",
              "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100                  0   \n",
              "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700                  0   \n",
              "2014-01-09  4.20  4.23  4.05   4.09       4.09  30667600                  0   \n",
              "\n",
              "            Buy_Sell_on_Open  Buy_Sell   Returns  \n",
              "Date                                              \n",
              "2014-01-03                 1         1  0.012658  \n",
              "2014-01-06                 1         1  0.032500  \n",
              "2014-01-07                 1         0  0.012107  \n",
              "2014-01-08                 0         0  0.000000  \n",
              "2014-01-09                 0         1 -0.021531  "
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 3,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.shape"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": [
              "(1257, 10)"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 4,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = dataset[['Open', 'High', 'Low', 'Volume']].values\n",
        "y = dataset['Adj Close'].values"
      ],
      "outputs": [],
      "execution_count": 5,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import xgboost\n",
        "\n",
        "# XGboost algorithm\n",
        "xgb = xgboost.XGBRegressor(n_estimators=100, learning_rate=0.08, gamma=0, subsample=0.75,\n",
        "                           colsample_bytree=1, max_depth=7)"
      ],
      "outputs": [],
      "execution_count": 6,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)\n",
        "xgb.fit(X_train,y_train)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 7,
          "data": {
            "text/plain": [
              "XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
              "       colsample_bytree=1, gamma=0, learning_rate=0.08, max_delta_step=0,\n",
              "       max_depth=7, min_child_weight=1, missing=None, n_estimators=100,\n",
              "       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,\n",
              "       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n",
              "       silent=True, subsample=0.75)"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 7,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "xgboost.plot_importance(xgb)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 8,
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x1903acd3240>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": [
              "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEWCAYAAACOv5f1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHipJREFUeJzt3X2cVWW99/HPbxhRAQNpAIcHnRCCkKfUEtN0PIgiCCrH2yJMUUvFTsR9sKJOWaa97knslFKK+AhoWqgBPqWETpopAgZC4oDKECCCIA/OQMHA7/5jrRk3MMIGZu29Z1/f9+u1X+x1rbX3+l2z9LvXutbaa5u7IyIiYSnIdgEiIpJ5Cn8RkQAp/EVEAqTwFxEJkMJfRCRACn8RkQAp/EX2YGYTzezH2a5DJEmm6/yloZhZJdAO2JnS/Fl3f+8Q3rMUeNDdOx5adY2TmT0ArHL3H2W7Fskv2vOXhjbE3VukPA46+BuCmRVmc/2HwsyaZLsGyV8Kf8kIM+tnZn8zs01mtjDeo6+dd4WZLTGzj8zsXTO7Jm5vDjwDtDezqvjR3sweMLObU15famarUqYrzez7ZvYGUG1mhfHrHjOzD8xsuZmN3ketde9f+95m9j0zW2dma8zsQjMbZGZLzexDM/thymt/amaPmtnv4/68bmZ9UuZ/zszK47/DP8xs6B7rvdPMnjazauAqYATwvbjvT8TLjTOzd+L3f9PMLkp5j5Fm9lczu9XMNsZ9PS9lfmszu9/M3ovnT0+Zd76ZLYhr+5uZ9U57A0ujo/CXxJlZB+Ap4GagNXA98JiZtYkXWQecD3wKuAL4lZmd6O7VwHnAewdxJDEcGAy0AnYBTwALgQ5Af2CMmZ2b5nsdAxwRv/YG4G7gUuAk4MvADWbWOWX5C4BpcV9/B0w3s8PM7LC4jueAtsC3gYfMrFvKa78G/Bw4CpgCPATcEvd9SLzMO/F6WwI3Ag+aWXHKe5wCVABFwC3AvWZm8bypQDPghLiGXwGY2YnAfcA1wKeBu4CZZnZ4mn8jaWQU/tLQpsd7jptS9iovBZ5296fdfZe7zwLmAYMA3P0pd3/HI38hCscvH2Idt7v7SnffBnwBaOPuP3P37e7+LlGAfzXN99oB/NzddwCPEIXqbe7+kbv/A/gHkLqXPN/dH42X/1+iD45+8aMFUBbX8TzwJNEHVa0Z7v5y/Hf6V33FuPs0d38vXub3wDLgiymLrHD3u919JzAZKAbaxR8Q5wHXuvtGd98R/70Bvgnc5e5z3H2nu08G/h3XLHmo0Y6HSs660N3/vEfbccD/MbMhKW2HAS8AxMMSPwE+S7RD0gxYdIh1rNxj/e3NbFNKWxPgpTTfa0McpADb4n/XpszfRhTqe63b3XfFQ1Lta+e5+66UZVcQHVHUV3e9zOwy4L+BkripBdEHUq33U9a/Nd7pb0F0JPKhu2+s522PAy43s2+ntDVNqVvyjMJfMmElMNXdv7nnjHhY4THgMqK93h3xEUPtMEV9l6NVE31A1DqmnmVSX7cSWO7uXQ+m+IPQqfaJmRUAHYHa4apOZlaQ8gFwLLA05bV79ne3aTM7juiopT/wirvvNLMFfPz32peVQGsza+Xum+qZ93N3/3ka7yN5QMM+kgkPAkPM7Fwza2JmR8QnUjsS7V0eDnwA1MRHAeekvHYt8Gkza5nStgAYFJ+8PAYYs5/1vwZsiU8CHxnX0NPMvtBgPdzdSWY2LL7SaAzR8MmrwByiD67vxecASoEhRENJn2QtkHo+oTnRB8IHEJ0sB3qmU5S7ryE6gX6HmR0d13BGPPtu4FozO8Uizc1ssJkdlWafpZFR+Evi3H0l0UnQHxKF1krgu0CBu38EjAb+AGwkOuE5M+W1bwEPA+/G5xHaE520XAhUEp0f+P1+1r+TKGT7AsuB9cA9RCdMkzAD+ApRf74ODIvH17cDQ4nG3dcDdwCXxX38JPcCPWrPobj7m8AvgVeIPhh6AS8fQG1fJzqH8RbRifYxAO4+j2jc/zdx3W8DIw/gfaWR0Ze8RBqQmf0U6OLul2a7FpF90Z6/iEiAFP4iIgHSsI+ISIC05y8iEqCcvc6/VatW3qVLl2yXkVHV1dU0b94822VkVGh9Dq2/oD5n2vz589e7e5v9LZez4d+uXTvmzZuX7TIyqry8nNLS0myXkVGh9Tm0/oL6nGlmtiKd5TTsIyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIHP3bNdQr2M7d/GCS27LdhkZNbZXDb9cVJjtMjIqtD6H1l8It8/fHnFBVtZtZvPd/eT9Lac9fxGRhFx55ZW0bduWnj171rV9+OGHDBgwgK5duzJgwAA2btwIgLszevRounTpQu/evXn99dcBWLFiBSeddBJ9+/blhBNOYOLEiQ1SW6Lhb2ajzWyJmT1kZreb2dtm9oaZnZjkekVEcsHIkSP505/+tFtbWVkZ/fv3Z9myZfTv35+ysjIAnnnmGZYtW8ayZcuYNGkSo0aNAqC4uJi//e1vLFiwgDlz5lBWVsZ77713yLUlved/HTAIeAjoGj+uBu5MeL0iIll3xhln0Lp1693aZsyYweWXXw7A5ZdfzvTp0+vaL7vsMsyMfv36sWnTJtasWUPTpk05/PDDAfj3v//Nrl27GqS2xMLfzCYCnYGZwB+BKR55FWhlZsVJrVtEJFetXbuW4uIo/oqLi1m3bh0Aq1evplOnTnXLdezYkdWrVwOwcuVKevfuTadOnfj+979P+/btD7mOxM7CuPu1ZjYQOAt4AFiZMnsV0AFYk/oaM7ua6MiAoqI23NCrJqnyclK7I6MTRSEJrc+h9RfC7XN5eTkA77//PtXV1XXTNTU1dc9Tp9evX8/f//53amqiv9XGjRuZP38+VVVVANx+++2sX7+eH//4xxQXF+91RHGgMnUK3upp2+syI3efBEyC6GqfEK8QUJ/zW2j9hXD7fElpKQCVlZU0b96c0ni6Q4cOdOvWjeLiYtasWUP79u0pLS2lT58+FBUV1S1XXV3N0KFD644Saj311FPs2rWrbrmDlamrfVYBnVKmOwKHfsZCRKSRGTp0KJMnTwZg8uTJXHDBBXXtU6ZMwd159dVXadmyJcXFxaxatYpt27YB0dHAyy+/TLdu3Q65jkx9HM8E/svMHgFOATa7+5r9vEZEpFEbPnx43ZBOx44dufHGGxk3bhyXXHIJ9957L8ceeyzTpk0DYNCgQTz99NN06dKFZs2acf/99wOwZMkSxo4di5nh7lx//fX06tXrkGtL9EteZlYJnAxsAH4DDAS2Ale4+7x9vbZbt25eUVGRWG25qLy8/JAP5Rqb0PocWn9Bfc60dL/kleiev7uXpEx+K8l1iYhI+vQNXxGRACn8RUQCpPAXEQmQwl9EJEAKfxGRACn8RUQCpPAXEQmQwl9EJEAKfxGRACn8RUQCpPAXEQmQwl9EJEAKfxGRACn8RUQCpPAXEQmQwl9EJEAKfxGRACn8RUQCpPAXEQmQwl9EJEAKfxGRACn8RUQCpPAXEQmQwl9EJEAKfxGRACn8RUQCpPAXEQmQwl9EJEAKfxGRACn8RUQCpPAXEQmQwl9EJEAKfxGRABVmu4BPsm3HTkrGPZXtMjJqbK8aRqrPeS3f+ltZNpiKigq+8pWv1LW9++67/OxnP+OVV16hoqKCqqoqampqaNWqFQsWLGDDhg1cfPHFzJ07l5EjR/Kb3/wmiz0IV2Lhb2ajgVHAm0B74ETgf9z91qTWKSKZ161bNxYsWADAzp076dChAxdddBFjxowBoLy8nCeeeIKWLVsCcMQRR3DTTTexePFiFi9enLW6Q5fknv91wHlANXAccGGC6xKRHDB79myOP/54jjvuuLo2d+cPf/gDzz//PADNmzfn9NNP5+23385WmUJCY/5mNhHoDMwERrj7XGBHEusSkdzxyCOPMHz48N3a3njjDdq1a0fXrl2zVJXUJ5E9f3e/1swGAme5+/p0X2dmVwNXAxQVteGGXjVJlJez2h0ZjQmHJLQ+51t/y8vL657v2LGDxx57jPPPP3+39meffZYvfvGLu7UBvPXWW6xevXqv9nxQVVWV8/3KqRO+7j4JmARwbOcu/stFOVVe4sb2qkF9zm/51t/KEaV1z2fMmMEpp5zCsGHD6tpqamoYNmwYkyZNomPHjru/trKSqqoqSktLyTfl5eU53y9d6ikiDeLhhx/ea8jnz3/+M506ddor+CX7DngXxMyOBjq5+xsJ1CMijdDWrVuZNWsWd911127tjzzyCP37999r+ZKSErZs2cL27duZPn06zz33HD169MhUuUKa4W9m5cDQePkFwAdm9hd3/+80XnsMMA/4FLDLzMYAPdx9y0FXLSI5pVmzZmzYsGGv9gceeKDese/Kysrki5J9SnfPv6W7bzGzbwD3u/tPzGyfe/7uXpIyecDHfEce1oSKssEH+rJGrby8fLcx1BCE1ufQ+iu5K90x/0IzKwYuAZ5MsB4REcmAdMP/Z8CzwDvuPtfMOgPLkitLRESSlNawj7tPA6alTL8L/GdSRYmISLLS2vM3s8+a2WwzWxxP9zazHyVbmoiIJCXdYZ+7gR8Q36Ihvszzq0kVJSIiyUo3/Ju5+2t7tOXPd9RFRAKTbvivN7PjAQcws4uBNYlVJSIiiUr3Ov9vEd1zp7uZrQaWAyMSq0pERBK13/A3swLgZHc/28yaAwXu/lHypYmISFL2O+zj7ruA/4qfVyv4RUQav3TH/GeZ2fVm1snMWtc+Eq1MREQSk+6Y/5Xxv99KaXOiX+sSEZFGJt1v+H4m6UJERCRz0r2l82X1tbv7lIYtR0REMiHdYZ8vpDw/AugPvA4o/EVEGqF0h32+nTptZi2BqYlUJCIiiTvY3/DdCnRtyEJERCRz0h3zf4L41g5EHxg9SLnFs4iINC7pjvnfmvK8Bljh7qsSqEdERDIg3WGfQe7+l/jxsruvMrNfJFqZiIgkJt3wH1BP23kNWYiIiGTOPod9zGwUcB3Q2czeSJl1FPBykoWJiEhy9jfm/zvgGeD/AeNS2j9y9w8Tq0pERBK1z/B3983AZmA4gJm1JfqSVwsza+Hu/0y+RBERaWjp/oD7EDNbRvQjLn8BKomOCEREpBFK94TvzUA/YGl8k7f+aMxfRKTRSjf8d7j7BqDAzArc/QWgb4J1iYhIgtL9ktcmM2sBvAQ8ZGbriL7sJSIijVC6e/4XEN3PZwzwJ+AdYEhSRYmISLLSvatntZkdB3R198lm1gxokmxpIiKSlHSv9vkm8ChwV9zUAZieVFEiIpKsdId9vgWcBmwBcPdlQNukihIRkWSlG/7/dvfttRNmVsjHt3gWEZFGJt2rff5iZj8EjjSzAUT3+3kiubJg246dlIx7KslV5JyxvWoYqT7ntUz3t7JscMbWJY1Lunv+44APgEXANcDTwI+SKkpEGt6mTZu4+OKL6d69O5/73Od45ZVXWLhwIaeeeiq9evViyJAhbNmyBYDXXnuNvn370rdvX/r06cMf//jHLFcvDW1/d/U81t3/6e67gLvjR9rMbDQwCjgGWAnsIvp+wBh3/+vBlSwiB+M73/kOAwcO5NFHH2X79u1s3bqVAQMGcOutt3LmmWdy3333MX78eG666SZ69uzJvHnzKCwsZM2aNfTp04chQ4ZQWJjuYIHkuv3t+ddd0WNmjx3E+18HDAI6AX3cvS9wJXDPQbyXiBykLVu28OKLL3LVVVcB0LRpU1q1akVFRQVnnHEGAAMGDOCxx6L/zZs1a1YX9P/6178ws+wULonZX/inbvHOB/LGZjYxfs1M4JvuXnuCuDk6WSySUe+++y5t2rThiiuu4POf/zzf+MY3qK6upmfPnsycOROAadOmsXLlyrrXzJkzhxNOOIFevXoxceJE7fXnGfs4k+uZafa6u5+45/O039ysEjjZ3deb2UVEvwvQFhjs7q/Us/zVwNUARUVtTrrh1wc0ytTotTsS1m7LdhWZFVqfM93fXh1aAlBRUcF1113HhAkT6NGjBxMmTKB58+acffbZTJgwgc2bN3Paaafx+OOPM2PGjN3eY8WKFZSVlXHbbbfRtGnTA66hqqqKFi1aNEh/Gots9vmss86a7+4n72+5/YX/TqCa6AjgSKJbPBBPu7t/ap9vnhL+KW1nADe4+9n7eu2xnbt4wSW37a/+vDK2Vw2/XBTW3lVofc50f2uv9nn//ffp168flZWVALz00kuUlZXx1FMfX3m0dOlSLr30Ul577bW93uess85i/PjxnHzyfjNlL+Xl5ZSWlh5U/Y1VNvtsZmmF/z6Hfdy9ibt/yt2PcvfC+Hnt9D6Dfx/v+SJwvJkVHczrReTAHXPMMXTq1ImKigoAZs+eTY8ePVi3bh0Au3bt4uabb+baa68FYPny5dTURPduXLFiBRUVFZSUlGSldklGRnZBzKwL8I67u5mdCDQFNmRi3SISmTBhAiNGjGD79u107tyZ+++/nylTpvDb3/4WgGHDhnHFFVcA8Ne//pWysjIOO+wwCgoKuOOOOygq0v5aPtnnsM8hv3k87ANcBVwG7AC2Ad/d36We3bp189q9lFDo8Dj/hdZfUJ8zLd1hn0T3/N29JH76i/ghIiI5IN1v+IqISB5R+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgFS+IuIBEjhLyISIIW/iEiAFP4iIgEqzHYBn2Tbjp2UjHsq22Vk1NheNYxUnxutyrLBAJSUlHDUUUfRpEkTCgsLmTdvHgATJkxg/PjxHHXUUQwePJhbbrmF7du3c8011zBv3jwKCgq47bbbKC0tzWIvJBSJhb+ZjQZGAd2BRXFzFTDK3RcmtV6RXPDCCy9QVFS02/SMGTO45557OOecc1i3bh0Ad999NwCLFi1i3bp1nHfeecydO5eCAh2US7KS/C/sOmAQcBpwprv3Bm4CJiW4TpGcdOeddzJu3DiaNm0KQNu2bQF488036d+/f11bq1at6o4URJKUSPib2USgMzATOMXdN8azXgU6JrFOkVxhZpxzzjmcdNJJTJoU7essXbqUl156iVGjRnHmmWcyd+5cAPr06cOMGTOoqalh+fLlzJ8/n5UrV2azfAlEIsM+7n6tmQ0EznL39SmzrgKe+aTXmdnVwNUARUVtuKFXTRLl5ax2R0Zj4CHJpz6Xl5cDMH78eIqKiti4cSPXX38927ZtY/PmzSxatIhbbrmFVatWMXToUH73u99x/PHHM2vWLLp37067du3o3r07S5YsqXuvfFBVVZVX/UlHY+hzxk74mtlZROF/+ict4+6TiIeFju3cxX+5KGfPRydibK8a1OfGq3JE6V5tCxcuZMeOHXTr1o3Ro0cDMGrUKG699VZ69uxJmzZt6oZ9AL70pS8xbNgwevTokamyE1deXh7cSezG0OeMnFUys97APcAF7r4hE+sUyYbq6mo++uijuufPPfccPXv25MILL+T5558HoiGg7du3U1RUxNatW6murgZg1qxZFBYW5lXwS+5KfJfLzI4FHge+7u5Lk16fSDatXbuWiy66CICamhq+9rWvMXDgQLZv386VV17J1KlTOfroo5k8eTJmxrp16zj33HMpKCigQ4cOTJ06Ncs9kFBk4nj7BuDTwB1mBlDj7idnYL0iGde5c2cWLtz7SuamTZvy4IMP7jUcUFJSQkVFRQYrFIkkFv7uXhI//Ub8OCBHHtaEivhLM6EoLy+vd9w4n4XYZ5FcoG+SiIgESOEvIhIghb+ISIAU/iIiAVL4i4gESOEvIhIghb+ISIAU/iIiAVL4i4gESOEvIhIghb+ISIAU/iIiAVL4i4gESOEvIhIghb+ISIAU/iIiAVL4i4gESOEvIhIghb+ISIAU/iIiAVL4i4gESOEvIhIghb+ISIAU/iIiAVL4i4gESOEvIhIghb+ISIAU/iIiAVL4i4gESOEvIhIghb+ISIAU/iIiAVL4i4gESOEvIhIghb+ISIAU/iIiAVL4i4gESOEvIhIgc/ds11AvM/sIqMh2HRlWBKzPdhEZFlqfQ+svqM+Zdpy7t9nfQoWZqOQgVbj7ydkuIpPMbJ76nN9C6y+oz7lKwz4iIgFS+IuIBCiXw39StgvIAvU5/4XWX1Cfc1LOnvAVEZHk5PKev4iIJEThLyISoJwMfzMbaGYVZva2mY3Ldj0Nwcw6mdkLZrbEzP5hZt+J21ub2SwzWxb/e3TcbmZ2e/w3eMPMTsxuDw6emTUxs7+b2ZPx9GfMbE7c59+bWdO4/fB4+u14fkk26z5YZtbKzB41s7fi7X1qPm9nM/u/8X/Ti83sYTM7It+2sZndZ2brzGxxStsBb1MzuzxefpmZXZ6NvtTKufA3sybAb4HzgB7AcDPrkd2qGkQNMNbdPwf0A74V92scMNvduwKz42mI+t81flwN3Jn5khvMd4AlKdO/AH4V93kjcFXcfhWw0d27AL+Kl2uMbgP+5O7dgT5Efc/L7WxmHYDRwMnu3hNoAnyV/NvGDwAD92g7oG1qZq2BnwCnAF8EflL7gZEV7p5TD+BU4NmU6R8AP8h2XQn0cwYwgOhbzMVxWzHRl9sA7gKGpyxft1xjegAdif7H+A/gScCIvvlYuOf2Bp4FTo2fF8bLWbb7cID9/RSwfM+683U7Ax2AlUDreJs9CZybj9sYKAEWH+w2BYYDd6W077Zcph85t+fPx/8x1VoVt+WN+FD388AcoJ27rwGI/20bL5Yvf4dfA98DdsXTnwY2uXtNPJ3ar7o+x/M3x8s3Jp2BD4D746Gue8ysOXm6nd19NXAr8E9gDdE2m09+b+NaB7pNc2pb52L4Wz1teXM9qpm1AB4Dxrj7ln0tWk9bo/o7mNn5wDp3n5/aXM+insa8xqIQOBG4090/D1Tz8XBAfRp1n+NhiwuAzwDtgeZEwx57yqdtvD+f1Mec6nsuhv8qoFPKdEfgvSzV0qDM7DCi4H/I3R+Pm9eaWXE8vxhYF7fnw9/hNGComVUCjxAN/fwaaGVmtfeVSu1XXZ/j+S2BDzNZcANYBaxy9znx9KNEHwb5up3PBpa7+wfuvgN4HPgS+b2Nax3oNs2pbZ2L4T8X6BpfLdCU6OTRzCzXdMjMzIB7gSXu/r8ps2YCtWf9Lyc6F1Dbfll85UA/YHPtIWZj4e4/cPeO7l5CtB2fd/cRwAvAxfFie/a59m9xcbx8o9ordPf3gZVm1i1u6g+8Sf5u538C/cysWfzfeG1/83YbpzjQbfoscI6ZHR0fMZ0Tt2VHtk+ifMKJlUHAUuAd4H+yXU8D9el0okO8N4AF8WMQ0XjnbGBZ/G/reHkjuurpHWAR0dUUWe/HIfS/FHgyft4ZeA14G5gGHB63HxFPvx3P75ztug+yr32BefG2ng4cnc/bGbgReAtYDEwFDs+3bQw8THROYwfRHvxVB7NNgSvjvr8NXJHNPun2DiIiAcrFYR8REUmYwl9EJEAKfxGRACn8RUQCpPAXEQlQLv+Au0gizGwn0SV4tS5098oslSOSFbrUU4JjZlXu3iKD6yv0j+9zI5ITNOwjsgczKzazF81sQXyP+i/H7QPN7HUzW2hms+O21mY2Pb5v+6tm1jtu/6mZTTKz54ApFv2mwXgzmxsve00WuyiiYR8J0pFmtiB+vtzdL9pj/teIbkH88/j3JZqZWRvgbuAMd18e35sdom+3/t3dLzSz/wCmEH3DF+Ak4HR332ZmVxN9zf8LZnY48LKZPefuy5PsqMgnUfhLiLa5e999zJ8L3BffiG+6uy8ws1Lgxdqwdvfam5GdDvxn3Pa8mX3azFrG82a6+7b4+TlAbzOrvd9NS6If+1D4S1Yo/EX24O4vmtkZwGBgqpmNBzZR/+1393Wb3uo9lvu2u2fvRl4iKTTmL7IHMzuO6HcI7ia6E+uJwCvAmWb2mXiZ2mGfF4ERcVspsN7r/52GZ4FR8dEEZvbZ+EdeRLJCe/4ieysFvmtmO4Aq4DJ3/yAet3/czAqI7t0+APgp0a92vQFs5eNb/O7pHqKfAXw9vvXxB8CFSXZCZF90qaeISIA07CMiEiCFv4hIgBT+IiIBUviLiARI4S8iEiCFv4hIgBT+IiIB+v/OUydBPJSsMwAAAABJRU5ErkJggg==\n"
            ],
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 8,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from xgboost import XGBClassifier\n",
        "\n",
        "# feature importance\n",
        "print(xgb.feature_importances_)\n",
        "# plot\n",
        "plt.bar(range(len(xgb.feature_importances_)), xgb.feature_importances_)\n",
        "plt.show()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[ 0.33036891  0.25395256  0.18741766  0.22826087]\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": [
              "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEbFJREFUeJzt3X+s3Xddx/Hny27djKhs7Kpka2kHNVJ+uOGlICgSGdCxZCURQjHEkcw0KE0wxMQSTBdLFgckaNQRNqUJErGMYeQqXZYBQ2PIoBcYG92suyuT3ZS4QueQABsdb/843+HZ4XT3e3/0njs/z0dycr/fz/fzOed9P+33db/3e873e1NVSJLa8BOTLkCStHoMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDzph0AaPOO++82rRp06TLkKQnlS9+8YvfrKqphfqtudDftGkTs7Ozky5Dkp5Ukvxnn36e3pGkhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIasuStyl2vTnk9OuoSJuu+ayyZdgqQ1zCN9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktSQXqGfZHuSI0nmkuwZs/0tSe5McnuSf0uydWjbO7pxR5K8eiWLlyQtzoKhn2QdcC1wKbAVeONwqHc+UlXPq6qLgPcA7+vGbgV2As8BtgPv755PkjQBfY70twFzVXW0qh4BDgA7hjtU1beHVn8KqG55B3Cgqh6uqq8Bc93zSZImoM9fzjofuH9ofR540WinJG8F3g6sB35zaOxtI2PPX1KlkqRl63OknzFt9WMNVddW1TOBPwL+eDFjk+xKMptk9vjx4z1KkiQtRZ/Qnwc2DK1fABx7gv4HgNcuZmxVXV9V01U1PTU11aMkSdJS9An9Q8CWJJuTrGfwxuzMcIckW4ZWLwPu6ZZngJ1JzkqyGdgCfGH5ZUuSlmLBc/pVdTLJbuBmYB2wv6oOJ9kHzFbVDLA7ySXAD4AHgSu6sYeT3ADcBZwE3lpVj56m70WStIA+b+RSVQeBgyNte4eW3/YEY68Grl5qgZKkleMVuZLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSG9Qj/J9iRHkswl2TNm+9uT3JXkjiSfTvKMoW2PJrm9e8ysZPGSpMU5Y6EOSdYB1wKvBOaBQ0lmququoW5fBqar6rtJfg94D/CGbtv3quqiFa5bkrQEC4Y+sA2Yq6qjAEkOADuAH4V+Vd061P824E0rWaRWz6Y9n5x0CRN13zWXTboE6bTqc3rnfOD+ofX5ru1UrgRuGlo/O8lsktuSvHYJNUqSVkifI/2MaauxHZM3AdPAbww1b6yqY0kuBD6T5M6qundk3C5gF8DGjRt7FS5JWrw+R/rzwIah9QuAY6OdklwCvBO4vKoefqy9qo51X48CnwUuHh1bVddX1XRVTU9NTS3qG5Ak9dcn9A8BW5JsTrIe2Ak87lM4SS4GrmMQ+A8MtZ+T5Kxu+TzgpQy9FyBJWl0Lnt6pqpNJdgM3A+uA/VV1OMk+YLaqZoD3Ak8BPpYE4OtVdTnwbOC6JD9k8APmmpFP/UiSVlGfc/pU1UHg4Ejb3qHlS04x7nPA85ZToCRp5XhFriQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNaTXFbmStBr8ew6n/+85eKQvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUkF6hn2R7kiNJ5pLsGbP97UnuSnJHkk8necbQtiuS3NM9rljJ4iVJi7Ng6CdZB1wLXApsBd6YZOtIty8D01X1fOBG4D3d2HOBq4AXAduAq5Kcs3LlS5IWo8+R/jZgrqqOVtUjwAFgx3CHqrq1qr7brd4GXNAtvxq4papOVNWDwC3A9pUpXZK0WH1C/3zg/qH1+a7tVK4EblrM2CS7kswmmT1+/HiPkiRJS9En9DOmrcZ2TN4ETAPvXczYqrq+qqaranpqaqpHSZKkpegT+vPAhqH1C4Bjo52SXAK8E7i8qh5ezFhJ0uroE/qHgC1JNidZD+wEZoY7JLkYuI5B4D8wtOlm4FVJzunewH1V1yZJmoAF/zB6VZ1MsptBWK8D9lfV4ST7gNmqmmFwOucpwMeSAHy9qi6vqhNJ3sXgBwfAvqo6cVq+E0nSghYMfYCqOggcHGnbO7R8yROM3Q/sX2qBkqSV4xW5ktQQQ1+SGtLr9I6kfjbt+eSkS5io+665bNIlaAEe6UtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kN6RX6SbYnOZJkLsmeMdtfluRLSU4med3ItkeT3N49ZlaqcEnS4i34h9GTrAOuBV4JzAOHksxU1V1D3b4OvBn4wzFP8b2qumgFapUkLdOCoQ9sA+aq6ihAkgPADuBHoV9V93XbfngaapQkrZA+p3fOB+4fWp/v2vo6O8lsktuSvHZR1UmSVlSfI/2MaatFvMbGqjqW5ELgM0nurKp7H/cCyS5gF8DGjRsX8dSSpMXoc6Q/D2wYWr8AONb3BarqWPf1KPBZ4OIxfa6vqumqmp6amur71JKkReoT+oeALUk2J1kP7AR6fQonyTlJzuqWzwNeytB7AZKk1bVg6FfVSWA3cDNwN3BDVR1Osi/J5QBJXphkHng9cF2Sw93wZwOzSb4C3ApcM/KpH0nSKupzTp+qOggcHGnbO7R8iMFpn9FxnwOet8waJUkrxCtyJakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQ3qFfpLtSY4kmUuyZ8z2lyX5UpKTSV43su2KJPd0jytWqnBJ0uItGPpJ1gHXApcCW4E3Jtk60u3rwJuBj4yMPRe4CngRsA24Ksk5yy9bkrQUfY70twFzVXW0qh4BDgA7hjtU1X1VdQfww5GxrwZuqaoTVfUgcAuwfQXqliQtQZ/QPx+4f2h9vmvrYzljJUkrrE/oZ0xb9Xz+XmOT7Eoym2T2+PHjPZ9akrRYfUJ/HtgwtH4BcKzn8/caW1XXV9V0VU1PTU31fGpJ0mL1Cf1DwJYkm5OsB3YCMz2f/2bgVUnO6d7AfVXXJkmagAVDv6pOArsZhPXdwA1VdTjJviSXAyR5YZJ54PXAdUkOd2NPAO9i8IPjELCva5MkTcAZfTpV1UHg4Ejb3qHlQwxO3Ywbux/Yv4waJUkrxCtyJakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWpIr9BPsj3JkSRzSfaM2X5Wko922z+fZFPXvinJ95Lc3j0+sLLlS5IW44yFOiRZB1wLvBKYBw4lmamqu4a6XQk8WFXPSrITeDfwhm7bvVV10QrXLUlagj5H+tuAuao6WlWPAAeAHSN9dgAf6pZvBF6RJCtXpiRpJfQJ/fOB+4fW57u2sX2q6iTwEPC0btvmJF9O8i9Jfn3cCyTZlWQ2yezx48cX9Q1IkvrrE/rjjtirZ59vABur6mLg7cBHkvzMj3Wsur6qpqtqempqqkdJkqSl6BP688CGofULgGOn6pPkDOBngRNV9XBVfQugqr4I3Av84nKLliQtTZ/QPwRsSbI5yXpgJzAz0mcGuKJbfh3wmaqqJFPdG8EkuRDYAhxdmdIlSYu14Kd3qupkkt3AzcA6YH9VHU6yD5itqhngg8CHk8wBJxj8YAB4GbAvyUngUeAtVXXidHwjkqSFLRj6AFV1EDg40rZ3aPn7wOvHjPs48PFl1ihJWiFekStJDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhrSK/STbE9yJMlckj1jtp+V5KPd9s8n2TS07R1d+5Ekr1650iVJi7Vg6CdZB1wLXApsBd6YZOtItyuBB6vqWcCfAe/uxm4FdgLPAbYD7++eT5I0AX2O9LcBc1V1tKoeAQ4AO0b67AA+1C3fCLwiSbr2A1X1cFV9DZjrnk+SNAF9Qv984P6h9fmubWyfqjoJPAQ8redYSdIqOaNHn4xpq559+owlyS5gV7f6nSRHnqCe84BvPsH2SZtofXn3gl2cvyfg/C2P87c8y5y/Z/R5jT6hPw9sGFq/ADh2ij7zSc4AfhY40XMsVXU9cH2fgpPMVtV0n76TYH3LY33LY33L00J9fU7vHAK2JNmcZD2DN2ZnRvrMAFd0y68DPlNV1bXv7D7dsxnYAnxhOQVLkpZuwSP9qjqZZDdwM7AO2F9Vh5PsA2aragb4IPDhJHMMjvB3dmMPJ7kBuAs4Cby1qh49Td+LJGkBfU7vUFUHgYMjbXuHlr8PvP4UY68Grl5GjaN6nQaaIOtbHutbHutbnv/39WVwFkaS1AJvwyBJDVnzoZ/k3CS3JLmn+3rOKfo9muT27jH6RvNK17Tk21Kshh71vTnJ8aH5+t1Vrm9/kgeSfPUU25PkL7r670jygjVW38uTPDQ0f3vH9TuN9W1IcmuSu5McTvK2MX0mNoc965vYHCY5O8kXknylq+9PxvSZ2D7cs76l78NVtaYfwHuAPd3yHuDdp+j3nVWqZx1wL3AhsB74CrB1pM/vAx/olncCH13F+epT35uBv5rgv+nLgBcAXz3F9tcANzG4zuPFwOfXWH0vB/55gvP3dOAF3fJPA/8x5t94YnPYs76JzWE3J0/pls8EPg+8eKTPJPfhPvUteR9e80f6PP4WDx8CXjvBWmB5t6VYK/VNVFX9K4NPeZ3KDuBva+A24KlJnr461fWqb6Kq6htV9aVu+X+Au/nxK90nNoc965uYbk6+062e2T1G39yc2D7cs74lezKE/s9X1Tdg8J8J+LlT9Ds7yWyS25Kczh8My7ktxWroe+uL3+p+7b8xyYYx2yfpyXD7jl/tfv2+KclzJlVEd9rhYgZHg8PWxBw+QX0wwTlMsi7J7cADwC1Vdcr5m8A+3Kc+WOI+vCZCP8mnknx1zGMxR6gba3Cl2m8Df57kmaer3DFtfW9LsRr6vPY/AZuq6vnAp/i/I5q1YpLz18eXgGdU1S8Dfwn84ySKSPIU4OPAH1TVt0c3jxmyqnO4QH0TncOqerSqLmJwl4BtSZ470mWi89ejviXvw2si9Kvqkqp67pjHJ4D/euzX0u7rA6d4jmPd16PAZxkcXZwOi7ktBXn8bSlWw4L1VdW3qurhbvWvgV9Zpdr66nX7jkmpqm8/9ut3Da5hOTPJeatZQ5IzGQTq31XVP4zpMtE5XKi+tTCH3Wv/N4O82D6yaZL78I+cqr7l7MNrIvQXMHyLhyuAT4x2SHJOkrO65fOAlzK4Cvh0WM5tKVbDgvWNnNu9nME517VkBvid7hMoLwYeeuwU31qQ5BceO7+bZBuD/ehbq/j6YXAV/N1V9b5TdJvYHPapb5JzmGQqyVO75Z8ELgH+faTbxPbhPvUtax9erXekl/pgcB7t08A93ddzu/Zp4G+65ZcAdzL4pMqdwJWnuabXMPhEwr3AO7u2fcDl3fLZwMcY/P2ALwAXrvKcLVTfnwKHu/m6FfilVa7v74FvAD9gcER1JfAW4C3d9jD4wz33dv+e02usvt1D83cb8JJVru/XGJxquAO4vXu8Zq3MYc/6JjaHwPOBL3f1fRXY27WviX24Z31L3oe9IleSGvJkOL0jSVohhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ35XzXAik+SFyuSAAAAAElFTkSuQmCC\n"
            ],
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 9,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.metrics import explained_variance_score\n",
        "\n",
        "predictions = xgb.predict(X_test)\n",
        "print(explained_variance_score(predictions,y_test))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0.997974320107\n"
          ]
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# from sklearn.preprocessing import Imputer\n",
        "from sklearn.impute import SimpleImputer\n",
        "\n",
        "# Preprocessing data\n",
        "my_imputer = SimpleImputer()\n",
        "X_train = my_imputer.fit_transform(X_train)\n",
        "X_test = my_imputer.transform(X_test)"
      ],
      "outputs": [],
      "execution_count": 11,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from xgboost import XGBRegressor\n",
        "\n",
        "model = XGBRegressor()\n",
        "# Add silent=True to avoid printing out updates with each cycle\n",
        "model.fit(X_train, y_train, verbose=False)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 12,
          "data": {
            "text/plain": [
              "XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
              "       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,\n",
              "       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,\n",
              "       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,\n",
              "       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n",
              "       silent=True, subsample=1)"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 12,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "In statistics, mean absolute error is a measure of difference between two continuous variables. Assume X and Y are variables of paired observations that express the same phenomenon. (Wikipedia)       \n",
        "\n",
        "The smaller the mean absolute percentage error (MAPE) the better the forecast."
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "# make predictions\n",
        "predictions = model.predict(X_test)\n",
        "\n",
        "from sklearn.metrics import mean_absolute_error\n",
        "print(\"Mean Absolute Error : \" + str(mean_absolute_error(predictions, y_test)))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mean Absolute Error : 0.128388661268\n"
          ]
        }
      ],
      "execution_count": 13,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print('XGBoost Regression Score:', model.score(X_test, y_test))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "XGBoost Regression Score: 0.998093563835\n"
          ]
        }
      ],
      "execution_count": 14,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "mimetype": "text/x-python",
      "version": "3.5.5",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "file_extension": ".py",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      }
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    },
    "nteract": {
      "version": "0.15.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}